Skip to content

Conversation

@clementval
Copy link
Contributor

No description provided.

@clementval clementval requested a review from wangzpgi March 7, 2025 01:31
@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Mar 7, 2025
@llvmbot
Copy link
Member

llvmbot commented Mar 7, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/130218.diff

2 Files Affected:

  • (modified) flang/module/cudadevice.f90 (+100)
  • (modified) flang/test/Lower/CUDA/cuda-device-proc.cuf (+18)
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 4491e9f653270..baaa112f5d8c2 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -1118,6 +1118,31 @@ attributes(device) integer function match_any_syncjd(mask, val)
       !dir$ ignore_tkr (d) x
       complex(8), intent(in) :: x
     end function
+    attributes(device) pure function __ldca_i4x4(x) result(y)
+      !dir$ ignore_tkr (d) x
+      integer(4), dimension(4), intent(in) :: x
+      integer(4), dimension(4) :: y
+    end function
+    attributes(device) pure function __ldca_i8x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      integer(8), dimension(2), intent(in) :: x
+      integer(8), dimension(2) :: y
+    end function
+    attributes(device) pure function __ldca_r2x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(2), dimension(2), intent(in) :: x
+      real(2), dimension(2) :: y
+    end function
+    attributes(device) pure function __ldca_r4x4(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(4), dimension(4), intent(in) :: x
+      real(4), dimension(4) :: y
+    end function
+    attributes(device) pure function __ldca_r8x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(8), dimension(2), intent(in) :: x
+      real(8), dimension(2) :: y
+    end function
   end interface
 
   ! LDCS
@@ -1158,6 +1183,31 @@ attributes(device) integer function match_any_syncjd(mask, val)
       !dir$ ignore_tkr (d) x
       complex(8), intent(in) :: x
     end function
+    attributes(device) pure function __ldcs_i4x4(x) result(y)
+      !dir$ ignore_tkr (d) x
+      integer(4), dimension(4), intent(in) :: x
+      integer(4), dimension(4) :: y
+    end function
+    attributes(device) pure function __ldcs_i8x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      integer(8), dimension(2), intent(in) :: x
+      integer(8), dimension(2) :: y
+    end function
+    attributes(device) pure function __ldcs_r2x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(2), dimension(2), intent(in) :: x
+      real(2), dimension(2) :: y
+    end function
+    attributes(device) pure function __ldcs_r4x4(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(4), dimension(4), intent(in) :: x
+      real(4), dimension(4) :: y
+    end function
+    attributes(device) pure function __ldcs_r8x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(8), dimension(2), intent(in) :: x
+      real(8), dimension(2) :: y
+    end function
   end interface
 
   ! LDLU
@@ -1198,6 +1248,31 @@ attributes(device) integer function match_any_syncjd(mask, val)
       !dir$ ignore_tkr (d) x
       complex(8), intent(in) :: x
     end function
+    attributes(device) pure function __ldlu_i4x4(x) result(y)
+      !dir$ ignore_tkr (d) x
+      integer(4), dimension(4), intent(in) :: x
+      integer(4), dimension(4) :: y
+    end function
+    attributes(device) pure function __ldlu_i8x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      integer(8), dimension(2), intent(in) :: x
+      integer(8), dimension(2) :: y
+    end function
+    attributes(device) pure function __ldlu_r2x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(2), dimension(2), intent(in) :: x
+      real(2), dimension(2) :: y
+    end function
+    attributes(device) pure function __ldlu_r4x4(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(4), dimension(4), intent(in) :: x
+      real(4), dimension(4) :: y
+    end function
+    attributes(device) pure function __ldlu_r8x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(8), dimension(2), intent(in) :: x
+      real(8), dimension(2) :: y
+    end function
   end interface
 
   ! LDCV
@@ -1238,6 +1313,31 @@ attributes(device) integer function match_any_syncjd(mask, val)
       !dir$ ignore_tkr (d) x
       complex(8), intent(in) :: x
     end function
+    attributes(device) pure function __ldcv_i4x4(x) result(y)
+      !dir$ ignore_tkr (d) x
+      integer(4), dimension(4), intent(in) :: x
+      integer(4), dimension(4) :: y
+      end function
+    attributes(device) pure function __ldcv_i8x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      integer(8), dimension(2), intent(in) :: x
+      integer(8), dimension(2) :: y
+    end function
+    attributes(device) pure function __ldcv_r2x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(2), dimension(2), intent(in) :: x
+      real(2), dimension(2) :: y
+    end function
+    attributes(device) pure function __ldcv_r4x4(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(4), dimension(4), intent(in) :: x
+      real(4), dimension(4) :: y
+    end function
+    attributes(device) pure function __ldcv_r8x2(x) result(y)
+      !dir$ ignore_tkr (d) x
+      real(8), dimension(2), intent(in) :: x
+      real(8), dimension(2) :: y
+    end function
   end interface
 
   ! STWB
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index c651d34c55093..feff9c31f12f2 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -198,3 +198,21 @@ end subroutine
 ! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<f64> to !llvm.ptr
 ! CHECK: %[[ATOMIC:.*]] = llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i64
 ! CHECK: %[[RES:.*]] = llvm.extractvalue %[[ATOMIC]][1] : !llvm.struct<(i64, i1)> 
+
+
+attributes(global) subroutine __ldXX(b)
+  integer, device :: b(*)
+  integer, device :: x(4)
+
+  x(1:4) = __ldca(b(i:j))
+  x = __ldcg(b(i:j))
+  x = __ldcs(b(i:j))
+  x(1:4) = __ldlu(b(i:j))
+  x(1:4) = __ldcv(b(i:j))
+end
+
+! CHECK-LABEL: func.func @_QP__ldxx
+! CHECK: __ldca_i4x4
+! CHECK: __ldcg_i4x4
+! CHECK: __ldcs_i4x4
+! CHECK: __ldlu_i4x4

@wangzpgi
Copy link
Contributor

wangzpgi commented Mar 7, 2025

Can you make tests for each of the cases added?

@clementval
Copy link
Contributor Author

Can you make tests for each of the cases added?

I can ... but we don't have one for all the others that were previously there.

@clementval
Copy link
Contributor Author

Can you make tests for each of the cases added?

I can ... but we don't have one for all the others that were previously there.

Just added some more.

@clementval clementval merged commit 5668c7b into llvm:main Mar 7, 2025
11 checks passed
@clementval clementval deleted the cuda_ldca branch March 7, 2025 18:19
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants